
import pymysql
import requests
from bs4 import BeautifulSoup

# 承认签证签发数量
BASE_URL = 'http://cmp.msa.gov.cn/psc2/trainees/list?search_LIKE_traineeNo=&search_LIKE_traineeName=&search_EQ_traineeInspPortCode=&search_EQ_traineeEnabled=&sortName=traineeInspPortCode&sortType=ASC&page='
headers = {
    'Accept': '*/*',
    'Content-Type': 'application/x-www-form-urlencoded'
}
cookies = {
    # OAMAuthnCookie_cmp.msa.gov.cn:80 参数用于登录认证
    'OAMAuthnCookie_cmp.msa.gov.cn:80': '5pHcApVWAxth6keoi02g7r3Iglhymo%2BNhizYzx3yi%2BoZrq%2F6ZoCw3QHRpO%2F%2BhxGPmkwYGHZdmvITnHBBQiECFqsAl7VnRnEFb81rcJ4C4KDqnT7fKEbSkQs6bub229341CTV2bE81c0m8FJ2Li18KqWvJc5r%2BqvCujwrktTTixR1l8Q%2F%2Bp%2FCMGdIo5MV6%2BwHX7RquBhsUOEHgiZoVg8j1LpCrEwtRtxPqGnaua6xBv%2FyIBHfz%2Ff6aPQ48DhL8V8ZMbnEHQQiaxRpRui0rHTtFmCGxsJZChonTc%2FJLKe0TpJetMDHyBg3wi5mzaXgbecCfVOvP1pMwdemoKLPZ1%2Be0Clw56E31vHRZ%2BiGyPgKoZQl6LSnzzqy1pyq7g%2Fv0zyMdyIcErOegFvbJ%2BlsUZ2G81uYPWusj6EuPbZebvyMUj5wA7WYAlcoZFaj2JUPOiA%2FuoDIc5h6Ovf76YlzJXubCbiR5yOhA2Ds8sD49qgIbh8%3D',
    # OAMAuthnHintCookie和JSESSIONID 参数用于声明返回接口数据，不配置的会一直返回HTML
    'OAMAuthnHintCookie': '1',
    'JSESSIONID': '3CCEDD9E28BA32E6A829259B1C5FE9A5'
}

session = requests.Session()
response_lo = session.post(BASE_URL, headers=headers, cookies=cookies)
hrml_content = response_lo.text
# print(hrml_content)
if response_lo.status_code == 200:
    soup = BeautifulSoup(hrml_content, 'lxml')
    # 定义存放所有数据的数组
    all_data = []
    # 获取html中所有的table
    tables = soup.find_all('table')
    # 页面数据主要在第2个table中，所以只要那这个table解析就行
    tableStr = tables[1]
    print(tableStr)
    # 获取table中所有的tr
    rows = tableStr.find_all('tr')
    # 循环获取tr里面所有td的数据
    for row in rows:
        cols = row.find_all('td')
        cols = [ele.text.strip() for ele in cols]
        all_data.append(cols)

    # 数据库连接配置
    db_config = {
        'host': '191.254.8.220',
        'user': 'root',
        'password': 'Szmsa@520',
        'db': 'spiders',
        'charset': 'utf8mb4',
        'cursorclass': pymysql.cursors.DictCursor
    }
    connection = pymysql.connect(**db_config)
    print(all_data)

    try:
        # 开始事务
        with connection.cursor() as cursor:
            # 拼装需要插入字段对应的%号个数
            dataValue = ", ".join(['%s'] * len(all_data[0]))
            # 拼接sql语句
            sql = f"INSERT INTO xt_fsc_trainees (No,Name,NameCn,UserIdcardno,SpecialtyCode,CertDate,Port,SeaExperienceFlag,Enabled,cols1,cols2) VALUES ({dataValue})"
            connection.cursor().executemany(sql, all_data)
            connection.commit()
    finally:
        connection.close()
